import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter
import plotly.express as px
file= pd.read_csv('C:/Users/Sena_Armagan/Downloads/global_ai/proje_2/NetflixOriginals.csv',encoding='latin-1')
file.head(10)
| Title | Genre | Premiere | Runtime | IMDB Score | Language | |
|---|---|---|---|---|---|---|
| 0 | Enter the Anime | Documentary | August 5, 2019 | 58 | 2.5 | English/Japanese |
| 1 | Dark Forces | Thriller | August 21, 2020 | 81 | 2.6 | Spanish |
| 2 | The App | Science fiction/Drama | December 26, 2019 | 79 | 2.6 | Italian |
| 3 | The Open House | Horror thriller | January 19, 2018 | 94 | 3.2 | English |
| 4 | Kaali Khuhi | Mystery | October 30, 2020 | 90 | 3.4 | Hindi |
| 5 | Drive | Action | November 1, 2019 | 147 | 3.5 | Hindi |
| 6 | Leyla Everlasting | Comedy | December 4, 2020 | 112 | 3.7 | Turkish |
| 7 | The Last Days of American Crime | Heist film/Thriller | June 5, 2020 | 149 | 3.7 | English |
| 8 | Paradox | Musical/Western/Fantasy | March 23, 2018 | 73 | 3.9 | English |
| 9 | Sardar Ka Grandson | Comedy | May 18, 2021 | 139 | 4.1 | Hindi |
file.shape
(584, 6)
file.isnull().sum()
Title 0 Genre 0 Premiere 0 Runtime 0 IMDB Score 0 Language 0 dtype: int64
file_language = Counter(file['Language'])
file_language =file.Language.value_counts().head(15)
file_language
English 401 Hindi 33 Spanish 31 French 20 Italian 14 Portuguese 12 Indonesian 9 Japanese 6 Korean 6 German 5 Turkish 5 English/Spanish 5 Polish 3 Dutch 3 Marathi 3 Name: Language, dtype: int64
file_two = file.loc[:,["Language","Runtime"]]
groups=file_two.groupby(["Language","Runtime"]).filter(lambda x: x.mean(numeric_only=True) >= 90).value_counts()
groups
Language Runtime
English 97 19
98 15
95 14
92 13
100 13
..
German 106 1
94 1
93 1
92 1
Turkish 114 1
Length: 176, dtype: int64
groups.plot(kind="bar")
<AxesSubplot:xlabel='Language,Runtime'>
file_doc= Counter(file["Genre"]=="Documentary")
file_doc
Counter({True: 159, False: 425})
file["Date"]=pd.to_datetime(file.Premiere)
file_doc=file.loc[(file["Genre"]=="Documentary") & (file["Date"]>="2019-01-01")&(file["Date"]<="2020-06-01")]
grafik=px.bar(file_doc, x=file_doc["IMDB Score"], y=file_doc.Title,color_discrete_sequence =['purple'])
grafik.show()
file_lang=file.loc[(file["Language"]=="English")]
file_score = file_lang.sort_values("IMDB Score",ascending=False)
file_score
| Title | Genre | Premiere | Runtime | IMDB Score | Language | Date | |
|---|---|---|---|---|---|---|---|
| 583 | David Attenborough: A Life on Our Planet | Documentary | October 4, 2020 | 83 | 9.0 | English | 2020-10-04 |
| 581 | Springsteen on Broadway | One-man show | December 16, 2018 | 153 | 8.5 | English | 2018-12-16 |
| 579 | Taylor Swift: Reputation Stadium Tour | Concert Film | December 31, 2018 | 125 | 8.4 | English | 2018-12-31 |
| 578 | Ben Platt: Live from Radio City Music Hall | Concert Film | May 20, 2020 | 85 | 8.4 | English | 2020-05-20 |
| 577 | Dancing with the Birds | Documentary | October 23, 2019 | 51 | 8.3 | English | 2019-10-23 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 14 | Mercy | Thriller | November 22, 2016 | 90 | 4.2 | English | 2016-11-22 |
| 10 | Searching for Sheela | Documentary | April 22, 2021 | 58 | 4.1 | English | 2021-04-22 |
| 8 | Paradox | Musical/Western/Fantasy | March 23, 2018 | 73 | 3.9 | English | 2018-03-23 |
| 7 | The Last Days of American Crime | Heist film/Thriller | June 5, 2020 | 149 | 3.7 | English | 2020-06-05 |
| 3 | The Open House | Horror thriller | January 19, 2018 | 94 | 3.2 | English | 2018-01-19 |
401 rows × 7 columns
file_sc=file_score[file_score["IMDB Score"]>=9]
grafik=px.bar(file_sc, x=file_sc.Title, y=file_sc["IMDB Score"], color=file_sc.Genre)
grafik.show()
file_dil=file[file["Language"]=="Hindi"]
file_dil
file_dil.groupby("Language")["Runtime"].mean()
Language Hindi 115.787879 Name: Runtime, dtype: float64
file_genre= Counter(file["Genre"])
file_genre=file.Genre.value_counts()
file_genre
Documentary 159
Drama 77
Comedy 49
Romantic comedy 39
Thriller 33
...
Romantic comedy-drama 1
Heist film/Thriller 1
Musical/Western/Fantasy 1
Horror anthology 1
Animation/Christmas/Comedy/Adventure 1
Name: Genre, Length: 115, dtype: int64
file_genre= Counter(file["Genre"])
file_genre=file.Genre.value_counts().head(15)
file_genre
Documentary 159 Drama 77 Comedy 49 Romantic comedy 39 Thriller 33 Comedy-drama 14 Crime drama 11 Biopic 9 Horror 9 Action 7 Romance 6 Concert Film 6 Aftershow / Interview 6 Animation 5 Action comedy 5 Name: Genre, dtype: int64
plt.subplots(figsize= (20,10))
sns.countplot(x= file.Genre, order= file_genre.index,color="purple")
plt.title('Top 15 Genre',color="dimgrey", fontsize= 30,pad=20,fontname="Comic Sans MS")
plt.xlabel("Genre",fontsize=20,color="dimgrey",loc="center",fontname="Comic Sans MS")
plt.ylabel("Count",fontsize=20,color="dimgrey",loc="top",fontname="Comic Sans MS")
plt.grid(linestyle="-",linewidth=0.25,axis="y",color="white")
gca=plt.gca()
gca.set_facecolor("lightgrey")
file_langu= Counter(file["Language"])
file_langu=file.Language.value_counts().head(3)
file_langu
English 401 Hindi 33 Spanish 31 Name: Language, dtype: int64
fig,ax = plt.subplots(figsize=(5,5),dpi=80)
c = file['Language'].value_counts().head(3)
labels = c.index
explode = (0, 0,0)
colors=["orange","red","blue"]
ax.pie(c.values, explode=explode, colors=colors,labels=labels, shadow=True, autopct='%1.2f%%')
plt.show()
groups=file.sort_values("IMDB Score",ascending=False).head(10)
groups
| Title | Genre | Premiere | Runtime | IMDB Score | Language | Date | |
|---|---|---|---|---|---|---|---|
| 583 | David Attenborough: A Life on Our Planet | Documentary | October 4, 2020 | 83 | 9.0 | English | 2020-10-04 |
| 582 | Emicida: AmarElo - It's All For Yesterday | Documentary | December 8, 2020 | 89 | 8.6 | Portuguese | 2020-12-08 |
| 581 | Springsteen on Broadway | One-man show | December 16, 2018 | 153 | 8.5 | English | 2018-12-16 |
| 580 | Winter on Fire: Ukraine's Fight for Freedom | Documentary | October 9, 2015 | 91 | 8.4 | English/Ukranian/Russian | 2015-10-09 |
| 579 | Taylor Swift: Reputation Stadium Tour | Concert Film | December 31, 2018 | 125 | 8.4 | English | 2018-12-31 |
| 578 | Ben Platt: Live from Radio City Music Hall | Concert Film | May 20, 2020 | 85 | 8.4 | English | 2020-05-20 |
| 577 | Dancing with the Birds | Documentary | October 23, 2019 | 51 | 8.3 | English | 2019-10-23 |
| 576 | Cuba and the Cameraman | Documentary | November 24, 2017 | 114 | 8.3 | English | 2017-11-24 |
| 573 | Klaus | Animation/Christmas/Comedy/Adventure | November 15, 2019 | 97 | 8.2 | English | 2019-11-15 |
| 571 | 13th | Documentary | October 7, 2016 | 100 | 8.2 | English | 2016-10-07 |
file_gi = file.loc[:,["Genre","IMDB Score"]]
file_gi=file[["Genre","IMDB Score"]].sort_values(["IMDB Score"],ascending=False).head(10)
file_gi
| Genre | IMDB Score | |
|---|---|---|
| 583 | Documentary | 9.0 |
| 582 | Documentary | 8.6 |
| 581 | One-man show | 8.5 |
| 580 | Documentary | 8.4 |
| 579 | Concert Film | 8.4 |
| 578 | Concert Film | 8.4 |
| 577 | Documentary | 8.3 |
| 576 | Documentary | 8.3 |
| 573 | Animation/Christmas/Comedy/Adventure | 8.2 |
| 571 | Documentary | 8.2 |
grafik=px.bar(file_gi, x="Genre", y="IMDB Score", color=file_gi.Genre,title="Top 10 High Genre")
grafik.show()
file_run=file.sort_values("Runtime",ascending=False).head(10)
file_run
| Title | Genre | Premiere | Runtime | IMDB Score | Language | Date | |
|---|---|---|---|---|---|---|---|
| 561 | The Irishman | Crime drama | November 27, 2019 | 209 | 7.8 | English | 2019-11-27 |
| 328 | Da 5 Bloods | War drama | June 12, 2020 | 155 | 6.5 | English | 2020-06-12 |
| 581 | Springsteen on Broadway | One-man show | December 16, 2018 | 153 | 8.5 | English | 2018-12-16 |
| 247 | Citation | Drama | November 6, 2020 | 151 | 6.2 | English | 2020-11-06 |
| 284 | The Forest of Love | Drama | October 11, 2019 | 151 | 6.3 | Japanese | 2019-10-11 |
| 509 | Raat Akeli Hai | Thriller | July 31, 2020 | 149 | 7.3 | Hindi | 2020-07-31 |
| 7 | The Last Days of American Crime | Heist film/Thriller | June 5, 2020 | 149 | 3.7 | English | 2020-06-05 |
| 543 | Ludo | Anthology/Dark comedy | November 12, 2020 | 149 | 7.6 | Hindi | 2020-11-12 |
| 194 | Army of the Dead | Zombie/Heist | May 21, 2021 | 148 | 5.9 | English | 2021-05-21 |
| 5 | Drive | Action | November 1, 2019 | 147 | 3.5 | Hindi | 2019-11-01 |
grafik=px.bar(file_run, x="Title", y="Runtime",title="TOP 10 High Runtime Films")
grafik.show()
file["Year"]=file["Date"].dt.year
year=Counter(file["Date"])
year=file.Year.value_counts().head(10)
year
2020 183 2019 125 2018 99 2021 71 2017 66 2016 30 2015 9 2014 1 Name: Year, dtype: int64
plt.subplots(figsize= (20,10))
sns.countplot(x= file.Year, order= year.index,color="blue")
plt.title('Yıllara Göre Film Sayısı ',color="dimgrey", fontsize= 30,pad=20,fontname="Comic Sans MS")
plt.xlabel("Year",fontsize=20,color="dimgrey",loc="center",fontname="Comic Sans MS")
plt.ylabel("Count",fontsize=20,color="dimgrey",loc="top",fontname="Comic Sans MS")
plt.grid(linestyle="-",linewidth=0.25,axis="y",color="white")
gca=plt.gca()
gca.set_facecolor("lightgrey")
file_li = file.loc[:,["Language","IMDB Score"]]
groups=file_li.groupby("Language")["IMDB Score"].mean().sort_values(ascending=True)
groups
Language Malay 4.200000 English/Japanese 4.400000 Norwegian 5.100000 Filipino 5.100000 Polish 5.166667 Thai 5.450000 Swedish 5.500000 Italian 5.542857 Spanish/Basque 5.600000 German 5.640000 Turkish 5.660000 French 5.770000 Dutch 5.800000 Indonesian 5.844444 Korean 5.916667 Hindi 5.981818 Marathi 6.066667 Portuguese 6.216667 English/Spanish 6.220000 Spanish 6.303226 English 6.380050 Japanese 6.400000 Spanish/Catalan 6.400000 English/Taiwanese/Mandarin 6.500000 English/Swedish 6.500000 Thia/English 6.700000 Georgian 6.800000 English/Mandarin 7.050000 Bengali 7.100000 Tamil 7.200000 Khmer/English/French 7.200000 English/Arabic 7.300000 English/Russian 7.300000 Spanish/English 7.300000 English/Korean 7.300000 English/Hindi 7.300000 English/Akan 7.700000 English/Ukranian/Russian 8.400000 Name: IMDB Score, dtype: float64
grafik=px.bar(file_li, x=file_li.Language, y=file_li["IMDB Score"])
grafik.show()
file_lr = file.loc[:,["Year","Runtime"]]
groups=file_lr.groupby("Year")["Runtime"].mean().sort_values(ascending=False)
groups
Year 2021 101.605634 2018 95.000000 2020 94.994536 2017 92.863636 2016 92.200000 2015 90.444444 2019 86.848000 2014 81.000000 Name: Runtime, dtype: float64
file_lg = file.loc[:,["Language","Genre"]]
groups=file_lg.groupby(["Language","Genre"]).mean()
groups
| Language | Genre |
|---|---|
| Bengali | Documentary |
| Dutch | Crime drama |
| Fantasy | |
| Romantic comedy | |
| English | Action |
| ... | ... |
| Thai | Horror |
| Thia/English | Documentary |
| Turkish | Comedy |
| Drama | |
| Romance |
204 rows × 0 columns